home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Cream of the Crop 25
/
Cream of the Crop 25.iso
/
os2
/
gnuwget.zip
/
wget-1.4.3
/
src
/
http.c
< prev
next >
Wrap
C/C++ Source or Header
|
1997-02-09
|
33KB
|
1,212 lines
/* HTTP support.
Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
#ifdef HAVE_CONFIG_H
# include <config.h>
#endif /* HAVE_CONFIG_H */
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#ifdef HAVE_UNISTD_H
# include <unistd.h>
#endif
#include <sys/stat.h>
#ifdef HAVE_STRING_H
# include <string.h>
#else
# include <strings.h>
#endif
#include <ctype.h>
#include <assert.h>
#ifdef WINDOWS
# include <winsock.h>
#endif
#include "wget.h"
#include "options.h"
#include "utils.h"
#include "url.h"
#include "host.h"
#include "http.h"
#include "retr.h"
#include "connect.h"
#include "mtch.h"
#include "netrc.h"
extern char *version_string;
extern struct options opt;
#ifndef errno
extern int errno;
#endif
#ifndef h_errno
extern int h_errno;
#endif
/* Function to fetch a header from socket/file descriptor fd. The
header may be of arbitrary length, since the function allocates as
much memory as necessary for the header to fit. Most errors are
handled.
The header may be terminated by LF or CRLF. If the character after
LF is SP or HT (horizontal tab), the header spans to another line
(continuation header), as per RFC2068.
The trailing CRLF or LF are stripped from the header, and it is
zero-terminated. */
uerr_t
fetch_next_header(int fd, char **hdr)
{
int i, bufsize, res;
char next;
bufsize = DYNAMIC_LINE_BUFFER;
*hdr = (char *)nmalloc(bufsize);
for (i = 0; 1; i++)
{
if (i > bufsize - 1)
*hdr = (char *)nrealloc(*hdr, (bufsize <<= 1));
res = buf_readchar(fd, *hdr + i);
if (res == 1)
{
if ((*hdr)[i] == '\n')
{
if (!(i == 0 || (i == 1 && (*hdr)[0] == '\r')))
{
/* If the header is non-empty, we need to check if it
continues on to the other line. We do that by
getting the next character without actually
downloading it (i.e. peeking it). */
res = buf_peek(fd, &next);
if (res == 0)
return HEOF;
else if (res == -1)
return HERR;
/* If the next character is SP or HT, just continue. */
if (next == '\t' || next == ' ')
continue;
}
/* The header ends. */
(*hdr)[i] = '\0';
/* Get rid of '\r'. */
if (i > 0 && (*hdr)[i - 1] == '\r')
(*hdr)[i - 1] = '\0';
break;
}
}
else if (res == 0)
return HEOF;
else
return HERR;
}
#ifdef DEBUG
if (opt.debug)
fprintf(opt.lfile, "%s\n", *hdr);
#endif
return HOK;
}
/* Parse the HTTP status line. It is of format:
HTTP-Version SP Status-Code SP Reason-Phrase
The function returns the status-code, or -1 if the status line is
malformed. The pointer to reason-phrase is returned in rp. */
int
hparsestatline(const char *hdr, const char **rp)
{
int mjr, mnr; /* HTTP major and minor version. */
int statcode; /* HTTP status code. */
const char *p;
*rp = NULL;
/* The standard format of HTTP-Version is:
HTTP/x.y, where x is major version, and y is minor version. */
if (strncmp(hdr, "HTTP/", 5) != 0)
return -1;
hdr += 5;
p = hdr;
for (mjr = 0; isdigit(*hdr); hdr++)
mjr = 10 * mjr + (*hdr - '0');
if (*hdr != '.' || p == hdr)
return -1;
++hdr;
p = hdr;
for (mnr = 0; isdigit(*hdr); hdr++)
mnr = 10 * mnr + (*hdr - '0');
if (*hdr != ' ' || p == hdr)
return -1;
/* Wget will accept only 1.0 and higher HTTP-versions. The value
of minor version can be safely ignored. */
if (mjr < 1)
return -1;
/* Skip the space. */
++hdr;
if (!(isdigit(*hdr) && isdigit(hdr[1]) && isdigit(hdr[2])))
return -1;
statcode = 100 * (*hdr - '0') + 10 * (hdr[1] - '0') + (hdr[2] - '0');
/* RFC2068 requires a SPC here, even if there is no reason-phrase.
As some servers/CGI are (incorrectly) setup to drop the SPC,
we'll be liberal and allow the status line to end here. */
if (hdr[3] != ' ')
{
if (!hdr[3])
*rp = hdr + 3;
else
return -1;
}
else
*rp = hdr + 4;
return statcode;
}
/* Skip LWS (linear white space), if present. Returns number of
characters to skip. */
int
hskip_lws(const char *hdr)
{
int i;
for (i = 0;
*hdr == ' ' || *hdr == '\t' || *hdr == '\r' || *hdr == '\n';
++hdr)
++i;
return i;
}
/* Return the content length of the document body, if this is
Content-length header, -1 otherwise. */
long
hgetlen(const char *hdr)
{
static const int l = 15; /* strlen("content-length:") */
long len;
if (strncasecmp(hdr, CONTLEN_H, l))
return -1;
hdr += (l + hskip_lws(hdr + l));
if (!*hdr)
return -1;
if (!isdigit(*hdr))
return -1;
for (len = 0; isdigit(*hdr); hdr++)
len = 10 * len + (*hdr - '0');
return len;
}
/* Return the content-range in bytes, as returned by the server, if
this is Content-range header, -1 otherwise. */
long
hgetrange(const char *hdr)
{
static const int l = 14; /* strlen("content-range:") */
long len;
if (strncasecmp(hdr, CONTRANGE_H, l))
return -1;
hdr += (l + hskip_lws(hdr + l));
if (!*hdr)
return -1;
/* Nutscape proxy server sends content-length without "bytes"
specifier, which is a breach of HTTP/1.1 draft. But heck, I must
support it... */
if (!strncasecmp(hdr, "bytes", 5))
{
hdr += 5;
hdr += hskip_lws(hdr);
if (!*hdr)
return -1;
}
if (!isdigit(*hdr))
return -1;
for (len = 0; isdigit(*hdr); hdr++)
len = 10 * len + (*hdr - '0');
return len;
}
/* Returns the malloc-ed copy of the type of the header hdr, to the
first ';', or NULL if the header does not begin with CONTTYPE_H
string. */
char *
hgettype(const char *hdr)
{
static const int l = 13; /* strlen("content-type:") */
char *type;
if (strncasecmp(hdr, CONTTYPE_H, l))
return NULL;
hdr += (l + hskip_lws(hdr + l));
if ((type = strrchr(hdr, ';')) != NULL)
*type = '\0';
return nstrdup(hdr);
}
/* Returns a malloc-ed copy of the location of the document, if the
string hdr begins with LOCATION_H, or NULL. */
char *
hgetlocation(const char *hdr)
{
static const int l = 9; /* strlen("location:") */
if (strncasecmp(hdr, LOCATION_H, l))
return NULL;
hdr += (l + hskip_lws(hdr + l));
return nstrdup(hdr);
}
/* Returns a malloc-ed copy of the last-modified date of the document,
if the hdr begins with LASTMODIFIED_H. */
char *
hgetmodified(const char *hdr)
{
static const int l = 14; /* strlen("last-modified:") */
if (strncasecmp(hdr, LASTMODIFIED_H, l))
return NULL;
hdr += (l + hskip_lws(hdr + l));
return nstrdup(hdr);
}
/* Returns 1 if the header is accept-ranges, and it contains the word
"none", 0 otherwise. */
int
haccepts_none(const char *hdr)
{
static const int l = 14; /* strlen("accept-ranges:") */
if (strncasecmp(hdr, ACCEPTRANGES_H, l))
return 0;
hdr += (l + hskip_lws(hdr + l));
if (strstr(hdr, "none"))
return 1;
else
return 0;
}
/* Retrieves a document through HTTP protocol. It recognizes status
code, and correctly handles redirections. It closes the network
socket. If it receives an error from the functions below it, it
will print it if there is enough information to do so (almost
always), returning the error to the caller (i.e. http_loop).
Various HTTP parameters are stored to hs. Although it parses the
response code correctly, it is not used in a sane way. The caller
can do that, though.
If u->proxy is non-NULL, the URL u will be taken as a proxy URL,
and u->proxy->url will be given to the proxy server (bad naming,
I'm afraid). */
uerr_t
gethttp(urlinfo *u, http_stat_t *hs, int *dt)
{
char *request, *hdr, *type, *command, *path;
char *user, *passwd;
const char *error;
char *pragma_h, *referer, *useragent, *range, *wwwauth, *remhost;
char *all_headers;
int sock, hcount, num_written, all_length, remport, statcode;
long contlen, contrange;
urlinfo *ou;
uerr_t err;
FILE *fp;
/* Let the others worry about local filename... */
if (!(*dt & HEAD_ONLY))
assert(u->local != NULL);
/* Initialize certain elements of struct hstat. */
hs->len = 0L;
hs->contlen = -1;
hs->res = -1;
hs->newloc = NULL;
hs->remote_time = NULL;
hs->error = NULL;
/* Which structure to use to yield the original URL data. */
if (u->proxy)
ou = u->proxy;
else
ou = u;
/* First: establish the connection. */
if (opt.verbose)
fprintf(opt.lfile, "Connecting to %s:%hu... ", u->host, u->port);
err = make_connection(&sock, u->host, u->port);
switch (err)
{
case HOSTERR:
if (!opt.quiet)
{
if (opt.verbose)
fprintf(opt.lfile, "\n");
fprintf(opt.lfile, "%s: %s.\n", u->host, herrmsg(h_errno));
}
return HOSTERR;
break;
case CONSOCKERR:
if (!opt.quiet)
{
if (opt.verbose)
fprintf(opt.lfile, "\n");
fprintf(opt.lfile, "socket: %s\n", mystrerror(errno));
}
return CONSOCKERR;
break;
case CONREFUSED:
if (!opt.quiet)
{
if (opt.verbose)
fprintf(opt.lfile, "\n");
fprintf(opt.lfile, "Connection to %s:%hu refused.\n", u->host, u->port);
}
CLOSE(sock);
return CONREFUSED;
case CONERROR:
if (!opt.quiet)
{
if (opt.verbose)
fprintf(opt.lfile, "\n");
fprintf(opt.lfile, "connect: %s\n", mystrerror(errno));
}
CLOSE(sock);
return CONERROR;
break;
case NOCONERROR:
/* Everything is fine! */
if (opt.verbose)
fprintf(opt.lfile, "connected!\n");
break;
default:
assert(0);
break;
} /* switch */
if (u->proxy)
path = u->proxy->url;
else
path = u->path;
command = (*dt & HEAD_ONLY) ? "HEAD" : "GET";
referer = NULL;
if (ou->referer)
{
referer = (char *)nmalloc(9 + strlen(ou->referer) + 3);
sprintf(referer, "Referer: %s\r\n", ou->referer);
}
if (*dt & SEND_NOCACHE)
pragma_h = "Pragma: no-cache\r\n";
else
pragma_h = "";
if (hs->restval)
{
range = (char *)nmalloc(13 + numdigit(hs->restval) + 4);
sprintf(range, "Range: bytes=%ld-\r\n", hs->restval);
}
else
range = NULL;
useragent = opt.useragent ? opt.useragent : version_string;
/* Construct the authentication, if userid is present. */
user = ou->user;
passwd = ou->passwd;
search_netrc(u->host, (const char **)&user, (const char **)&passwd, 0);
user = user ? user : opt.http_user;
passwd = passwd ? passwd : opt.http_passwd;
if (user && passwd)
{
char *t1, *t2;
t1 = (char *)nmalloc(strlen(user) + 1 + 2 * strlen(passwd));
sprintf(t1, "%s:%s", user, passwd);
t2 = base64_encode_line(t1);
free(t1);
wwwauth = (char *)nmalloc(strlen(t2) + 24);
sprintf(wwwauth, "Authorization: Basic %s\r\n", t2);
free(t2);
}
else
wwwauth = NULL;
remhost = ou->host;
remport = ou->port;
/* Allocate the memory for the request. */
request = (char *)nmalloc(strlen(command) + strlen(path)
+ strlen(useragent)
+ strlen(remhost) + numdigit(remport)
+ strlen(HTTP_ACCEPT)
+ (referer ? strlen(referer) : 0)
+ (wwwauth ? strlen(wwwauth) : 0)
+ (range ? strlen(range) : 0)
+ strlen(pragma_h)
+ (opt.user_header ? strlen(opt.user_header) : 0)
+ 60);
/* Construct the request. */
sprintf(request, "%s %s HTTP/1.0\r\nUser-Agent: %s\r\nHost: %s:%d\r\nAccept: %s\r\n%s%s%s%s%s\r\n",
command, path, useragent, remhost, remport, HTTP_ACCEPT, referer ?
referer : "", wwwauth ? wwwauth : "", range ? range : "",
pragma_h, opt.user_header ? opt.user_header : "");
/* Free the temporary memory. */
if (referer)
free(referer);
if (range)
free(range);
if (wwwauth)
free(wwwauth);
DEBUGP(request);
/* Send the request to server */
num_written = iwrite(sock, request, strlen(request));
if (num_written != strlen(request))
{
if (opt.verbose)
fprintf(opt.lfile, "Failed writing HTTP request.\n");
free(request);
CLOSE(sock);
return WRITEFAILED;
}
if (opt.verbose)
fprintf(opt.lfile, "%s request sent, fetching headers... ",
u->proxy ? "HTTP proxy" : "HTTP");
free(request);
contlen = contrange = -1;
type = NULL;
statcode = -1;
*dt &= ~RETROKF;
/* Since this is a new connection, we may safely discard anything
left in the buffer. */
buf_discard();
all_headers = NULL;
all_length = 0;
/* Header-fetching loop. */
hcount = 0;
for (;;)
{
++hcount;
/* Get the header. */
err = fetch_next_header(sock, &hdr);
/* Check for errors. */
if (err == HEOF)
{
if (!opt.quiet)
{
if (opt.verbose)
fprintf(opt.lfile, "\n");
fprintf(opt.lfile, "End of file while parsing headers.\n");
}
free(hdr);
if (type)
free(type);
if (hs->newloc)
free(hs->newloc);
if (all_headers)
free(all_headers);
CLOSE(sock);
return HEOF;
}
else if (err == HERR)
{
if (!opt.quiet)
{
if (opt.verbose)
fprintf(opt.lfile, "\n");
fprintf(opt.lfile, "Read error (%s) in headers.\n",
mystrerror(errno));
}
free(hdr);
if (type)
free(type);
if (hs->newloc)
free(hs->newloc);
if (all_headers)
free(all_headers);
CLOSE(sock);
return HERR;
}
/* If the headers are to be saved to a file later, save them to
memory now. */
if (opt.save_headers)
{
int lh = strlen(hdr);
all_headers = (char *)nrealloc(all_headers, all_length + lh + 2);
memcpy(all_headers + all_length, hdr, lh);
all_length += lh;
all_headers[all_length++] = '\n';
all_headers[all_length] = '\0';
}
/* Exit on empty header. */
if (!*hdr)
{
free(hdr);
break;
}
/* Print the header if necessary. */
if (opt.verbose && opt.server_response)
fprintf(opt.lfile, "\n%d %s", hcount, hdr);
/* Check for errors documented in the first header. */
if (hcount == 1)
{
/* Parse the first line of server response. */
statcode = hparsestatline(hdr, &error);
hs->statcode = statcode;
/* Store the descriptive response. */
if (statcode == -1) /* malformed request */
hs->error = nstrdup("UNKNOWN");
else if (!*error)
hs->error = nstrdup("(no description)");
else
hs->error = nstrdup(error);
}
/* Try getting content-length. */
if (contlen == -1 && !opt.ignore_length)
contlen = hgetlen(hdr);
/* Try getting content-type. */
if (!type)
type = hgettype(hdr);
/* Try getting location. */
if (!hs->newloc)
hs->newloc = hgetlocation(hdr);
/* Try getting last-modified. */
if (!hs->remote_time)
hs->remote_time = hgetmodified(hdr);
/* Check for accept-ranges header. If it contains the word
`none', disable the ranges. */
if (*dt & ACCEPTRANGES)
if (haccepts_none(hdr))
*dt &= ~ACCEPTRANGES;
/* Try getting content-range. */
if (contrange == -1)
contrange = hgetrange(hdr);
/* Free the current header. */
free(hdr);
} /* for (;;) */
/* 20x responses are counted among successful by default. */
if (H_20X(statcode))
*dt |= RETROKF;
if (type)
(!strncasecmp(type, TEXTHTML_S, strlen(TEXTHTML_S))) ?
(*dt |= TEXTHTML) : (*dt &= ~TEXTHTML);
else
*dt &= ~TEXTHTML; /* NOT text/html by default */
if (contrange == -1)
hs->restval = 0;
else if (contrange != hs->restval ||
(H_PARTIAL(statcode) && contrange == -1))
{
/* This means the whole request was somehow misunderstood by the
server. Bail out. */
if (type)
free(type);
if (hs->newloc)
free(hs->newloc);
if (all_headers)
free(all_headers);
CLOSE(sock);
return RANGEERR;
}
if (hs->restval)
{
if (contlen != -1)
contlen += contrange;
else
contrange = -1; /* If conent-length was not sent,
content-range will be ignored. */
}
hs->contlen = contlen;
if (opt.verbose)
{
if (!opt.server_response)
fprintf(opt.lfile, "done.");
fprintf(opt.lfile, "\n");
}
/* Return if redirected. */
if (H_REDIRECTED(statcode) || statcode == HTTP_MULTIPLE_CHOICES)
{
/* RFC2068 says that in case of the 300 (multiple choices)
response, the server can output a preferred URL through
`Location' header; otherwise, the request should be treated
like GET. So, if the location is set, it will be a
redirection; otherwise, just proceed normally. */
if (statcode == HTTP_MULTIPLE_CHOICES && !hs->newloc)
*dt |= RETROKF;
else
{
fprintf(opt.lfile, "Location: %s%s\n",
hs->newloc ? hs->newloc : "unspecified",
hs->newloc ? " [following]" : "");
CLOSE(sock);
if (all_headers)
free(all_headers);
if (type)
free(type);
return NEWLOCATION;
}
}
if (opt.verbose)
{
if ((*dt & RETROKF) && !opt.server_response)
{
/* No need tp print this output if the body won't be
downloaded at all, or if the original server response is
printed. */
fprintf(opt.lfile, "Length: ");
if (contlen != -1)
{
fprintf(opt.lfile, "%s", legible(contlen));
if (contrange != -1)
fprintf(opt.lfile, " (%s to go)", legible(contlen - contrange));
}
else
fprintf(opt.lfile, opt.ignore_length ? "ignored" : "unspecified");
if (type)
fprintf(opt.lfile, " [%s]\n", type);
else
fprintf(opt.lfile, "\n");
}
}
if (type)
free(type);
type = NULL; /* We don't need it any more. */
/* Return if we have no intention of further downloading. */
if (!(*dt & RETROKF) || (*dt & HEAD_ONLY))
{
/* In case someone cares to look... */
hs->len = 0L;
hs->res = 0;
if (all_headers)
free(all_headers);
if (type)
free(type);
CLOSE(sock);
return RETRFINISHED;
}
/* Open the local file. */
if (!opt.dfp)
{
mkalldirs(u->local);
fp = fopen(u->local, hs->restval ? "ab" : "wb");
if (!fp)
{
if (!opt.quiet)
fprintf(opt.lfile, "%s: %s\n", u->local, mystrerror(errno));
CLOSE(sock);
if (all_headers)
free(all_headers);
return FOPENERR;
}
}
else /* opt.dfp */
fp = opt.dfp;
if (opt.save_headers)
fwrite(all_headers, 1, all_length, fp);
reset_timer();
/* Get the contents of the document. */
hs->res = get_contents(sock, fp, &hs->len, hs->restval, 0);
hs->dltime = elapsed_time();
if (!opt.dfp)
fclose(fp);
else
fflush(fp);
if (all_headers)
free(all_headers);
CLOSE(sock);
if (hs->res == -2)
return FWRITEERR;
return RETRFINISHED;
}
/* The genuine HTTP loop! This is the part where the retrieval is
retried, and retried, and retried, and... */
uerr_t
http_loop(urlinfo *u, char **newloc, int *dt)
{
static int first_retrieval = 1;
int count;
int use_ts, got_head = 0; /* Time-stamping info. */
char *tms, *suf, *locf, *tmrate;
uerr_t err;
time_t tml = -1, tmr = -1; /* Local and remote time-stamps. */
long local_size = 0; /* The size of the local file. */
http_stat_t hstat; /* HTTP status. */
struct stat st;
void my_touch PARAMS((char *, time_t));
*newloc = NULL;
/* Warn on wildcard usage in HTTP. Don't use has_wildcards because
it would also warn on '?', and we don't what that because of
CGI. */
if (opt.verbose && strchr(u->url, '*'))
fprintf(opt.lfile, "Warning: wildcards not supported in HTTP.\n");
/* Determine the local filename. */
if (!u->local)
{
if (!opt.timestamping || opt.recursive)
u->local = url_filename(u->proxy ? u->proxy : u);
else /* opt.timestamping && !recursive */
{
if (*(u->proxy ? u->proxy->file : u->file))
u->local = nstrdup(u->proxy ? u->proxy->file : u->file);
else
u->local = nstrdup("index.html");
}
}
if (!opt.output_document)
locf = u->local;
else
locf = opt.output_document;
if (opt.noclobber && exists(u->local))
{
/* If opt.noclobber is turned on and file already exists, do not
retrieve the file */
if (opt.verbose)
fprintf(opt.lfile, "File `%s' already there, will not retrieve.\n",
u->local);
/* If the file is there, we suppose it's retrieved OK. */
*dt |= RETROKF;
/* If its suffix is "html" or (yuck!) "htm", we suppose it's
text/html, a harmless lie. */
if (((suf = suffix(u->local)) != NULL)
&& (!strcmp(suf, "html") || !strcmp(suf, "htm")))
*dt |= TEXTHTML;
free(suf);
/* Another harmless lie: */
return RETROK;
}
use_ts = 0;
if (opt.timestamping)
{
if (stat(u->local, &st) == 0)
{
use_ts = 1;
tml = st.st_mtime;
local_size = st.st_size;
got_head = 0;
}
}
/* Reset the counter. */
count = 0;
*dt = 0 | ACCEPTRANGES;
/* THE loop */
do
{
/* Increment the pass counter. */
++count;
/* Wait before the retrieval (unless this is the very first
retrieval). */
if (!first_retrieval && opt.wait)
sleep(opt.wait);
if (first_retrieval)
first_retrieval = 0;
/* Get the current time string. */
tms = time_str(NULL);
/* Print fetch message, if opt.verbose. */
if (opt.verbose)
{
char *hurl = str_url(u->proxy ? u->proxy : u, 1);
char tmp[15];
strcpy(tmp, " ");
if (count > 1)
sprintf(tmp, "(try:%2d)", count);
fprintf(opt.lfile,
"--%s-- %s\n %s => `%s'\n",
tms, hurl, tmp, locf);
free(hurl);
}
/* Default document type is empty. However, if spider mode is
on or time-stamping is employed, HEAD_ONLY commands is
encoded within *dt. */
if (opt.spider || (use_ts && !got_head))
*dt |= HEAD_ONLY;
else
*dt &= ~HEAD_ONLY;
/* Assume no restarting. */
hstat.restval = 0L;
/* Decide whether or not to restart. */
if (((count > 1 && (*dt & ACCEPTRANGES)) || opt.always_rest)
&& exists(u->local))
if (stat(u->local, &st) == 0)
hstat.restval = st.st_size;
/* Decide whether to send the no-cache directive. */
if (u->proxy && (count > 1 || (opt.proxy_cache == 0)))
*dt |= SEND_NOCACHE;
else
*dt &= ~SEND_NOCACHE;
/* Try fetching the document, or at least its head. :-) */
err = gethttp(u, &hstat, dt);
/* Time? */
tms = time_str(NULL);
/* Get the new location (with or without the redirection). */
if (hstat.newloc)
*newloc = nstrdup(hstat.newloc);
switch (err)
{
case HERR: case HEOF: case CONSOCKERR: case CONCLOSED:
case CONERROR: case READERR: case WRITEFAILED:
case RANGEERR:
/* Non-fatal errors continue executing the loop, which
will bring them to "while" statement at the end, to
judge whether the number of tries was exceeded. */
FREEHSTAT(hstat);
printwhat(count, opt.ntry);
continue;
break;
case HOSTERR: case CONREFUSED: case PROXERR:
/* Fatal errors just return from the function. */
FREEHSTAT(hstat);
return err;
break;
case FWRITEERR: case FOPENERR:
/* Another fatal error. */
if (!opt.quiet)
{
if (opt.verbose)
fprintf(opt.lfile, "\n");
fprintf(opt.lfile, "Cannot write to `%s' (%s).\n",
u->local, mystrerror(errno));
}
FREEHSTAT(hstat);
return err;
break;
case NEWLOCATION:
/* Return the new location to the caller. */
if (!hstat.newloc)
{
if (!opt.quiet)
fprintf(opt.lfile,
"ERROR: Redirection (%d) without location.\n",
hstat.statcode);
return WRONGCODE;
}
FREEHSTAT(hstat);
return NEWLOCATION;
break;
case RETRFINISHED:
/* Deal with you later. */
break;
default:
/* All possibilities should have been exhausted. */
assert(0);
}
if (!(*dt & RETROKF))
{
if (!opt.quiet)
{
fprintf(opt.lfile, "%s ERROR %d: %s.\n", tms, hstat.statcode,
hstat.error);
if (opt.verbose)
fputc('\n', opt.lfile);
}
FREEHSTAT(hstat);
return WRONGCODE;
}
/* Did we get the time-stamp? */
if (!got_head)
{
if (opt.timestamping && !hstat.remote_time)
{
if (!opt.quiet)
fprintf(opt.lfile, "Last-modified header missing -- time-stamps turned off.\n");
}
else if (hstat.remote_time)
{
/* Convert the date-string into struct tm. */
tmr = http_atotm(hstat.remote_time);
if (tmr == -1)
if (opt.verbose)
fprintf(opt.lfile, "Last-modified header invalid -- time-stamp ignored.\n");
}
}
/* The time-stamping section. */
if (use_ts)
{
got_head = 1;
*dt &= ~HEAD_ONLY;
use_ts = 0; /* No more time-stamping. */
count = 0; /* The retrieve count for HEAD is
reset. */
if (hstat.remote_time && tmr != -1)
{
/* Now time-stamping can be used validly. Time -
stamping means that if the sizes of the local and
remote file match, and local file is newer than the
remote file, it will not be retrieved. Otherwise,
the normal download procedure is resumed. */
if (local_size == hstat.contlen && tml >= tmr)
{
if (opt.verbose)
fprintf(opt.lfile, "Local file `%s' is more recent, not retrieving.\n\n", u->local);
FREEHSTAT(hstat);
return RETROK;
}
else if (local_size != hstat.contlen)
{
if (opt.verbose)
fprintf(opt.lfile, "The sizes do not match (local %ld), retrieving.\n", local_size);
}
else
{
if (opt.verbose)
fprintf(opt.lfile, "Remote file is newer, retrieving.\n");
}
}
FREEHSTAT(hstat);
continue;
} /* use_ts */
if (!opt.dfp
&& (tmr != -1) && !opt.spider &&
((hstat.len == hstat.contlen) ||
((hstat.res == 0) &&
((hstat.contlen == -1) ||
(hstat.len >= hstat.contlen && !opt.kill_longer)))))
{
my_touch(u->local, tmr);
}
/* End of time-stamping section. */
if (opt.spider)
{
fprintf(opt.lfile, "%d %s\n\n", hstat.statcode, hstat.error);
return RETROK;
}
/* It is now safe to free the remainder of hstat, since the
strings within it will no longer be used. */
FREEHSTAT(hstat);
tmrate = rate(hstat.len - hstat.restval, hstat.dltime);
if (hstat.len == hstat.contlen)
{
if (*dt & RETROKF)
{
if (opt.verbose)
fprintf(opt.lfile, "%s (%s) - `%s' saved [%ld/%ld]\n\n",
tms, tmrate, locf, hstat.len, hstat.contlen);
else if (!opt.quiet)
fprintf(opt.lfile, "%s URL:%s [%ld/%ld] -> \"%s\" [%d]\n",
tms, u->url, hstat.len, hstat.contlen, locf, count);
}
++opt.numurls;
opt.downloaded += hstat.len;
return RETROK;
}
else if (hstat.res == 0) /* No read error */
{
if (hstat.contlen == -1) /* We don't know how much we were
supposed to get, so... */
{
if (*dt & RETROKF)
{
if (opt.verbose)
fprintf(opt.lfile, "%s (%s) - `%s' saved [%ld]\n\n",
tms, tmrate, locf, hstat.len);
else if (!opt.quiet)
fprintf(opt.lfile, "%s URL:%s [%ld] -> \"%s\" [%d]\n",
tms, u->url, hstat.len, locf, count);
}
++opt.numurls;
opt.downloaded += hstat.len;
return RETROK;
}
else if (hstat.len < hstat.contlen) /* Meaning we lost the
connection too soon */
{
if (opt.verbose)
{
fprintf(opt.lfile, "%s (%s) - Connection closed at byte %ld. ",
tms, tmrate, hstat.len);
printwhat(count, opt.ntry);
}
continue;
}
else if (!opt.kill_longer) /* Meaning we got more than expected */
{
if (opt.verbose)
fprintf(opt.lfile, "%s (%s) - `%s' saved [%ld/%ld])\n\n",
tms, tmrate, locf, hstat.len, hstat.contlen);
else if (!opt.quiet)
fprintf(opt.lfile, "%s URL:%s [%ld/%ld] -> \"%s\" [%d]\n",
tms, u->url, hstat.len, hstat.contlen, locf, count);
++opt.numurls;
opt.downloaded += hstat.len;
return RETROK;
}
else /* The same, but not accepted */
{
if (opt.verbose)
{
fprintf(opt.lfile,
"%s (%s) - Connection closed at byte %ld/%ld. ",
tms, tmrate, hstat.len, hstat.contlen);
printwhat(count, opt.ntry);
}
continue;
}
}
else /* Now hstat.res can only be -1 */
{
if (hstat.contlen == -1)
{
if (opt.verbose)
{
fprintf(opt.lfile,
"%s (%s) - Read error at byte %ld (%s).",
tms, tmrate, hstat.len, mystrerror(errno));
printwhat(count, opt.ntry);
}
continue;
}
else /* hstat.res == -1 and contlen is given */
{
if (opt.verbose)
{
fprintf(opt.lfile,
"%s (%s) - Read error at byte %ld/%ld (%s). ",
tms, tmrate, hstat.len, hstat.contlen,
mystrerror(errno));
printwhat(count, opt.ntry);
}
continue;
}
}
/* not reached */
break;
} while (!opt.ntry || (count < opt.ntry));
return TRYLIMEXC;
}
/* Encode a zero-terminated string in base64. Returns the malloc-ed
encoded line. This is useful for HTTP only.
Note that the string may not contain NUL characters. */
char *
base64_encode_line(const char *s)
{
/* Conversion table. */
static char tbl[64] = {
'A','B','C','D','E','F','G','H',
'I','J','K','L','M','N','O','P',
'Q','R','S','T','U','V','W','X',
'Y','Z','a','b','c','d','e','f',
'g','h','i','j','k','l','m','n',
'o','p','q','r','s','t','u','v',
'w','x','y','z','0','1','2','3',
'4','5','6','7','8','9','+','/'
};
int len, i;
char *res;
unsigned char *p;
len = strlen(s);
res = (char *)nmalloc(4 * ((len + 2) / 3) + 1);
p = (unsigned char *)res;
/* Transform the 3x8 bits to 4x6 bits, as required by
base64. */
for (i = 0; i < len; i += 3)
{
*p++ = tbl[s[0] >> 2];
*p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)];
*p++ = tbl[((s[1] & 0xf) << 2) + (s[2] >> 6)];
*p++ = tbl[s[2] & 0x3f];
s += 3;
}
/* Pad the result if necessary... */
if (i == len + 1)
*(p - 1) = '=';
else if (i == len + 2)
*(p - 1) = *(p - 2) = '=';
/* ...and zero-teminate it. */
*p = '\0';
return res;
}
/* Converts struct tm to time_t, assuming the data in tm is UTC rather
than local timezone (as mktime assumes).
Contributed by Roger Beeman <beeman@cisco.com>. */
time_t
mktime_from_utc(struct tm *t)
{
time_t tl, tb;
tl = mktime(t);
tb = mktime(gmtime(&tl));
return (tl <= tb ? (tl + (tl - tb)) : (tl - (tb - tl)));
}
/* Converts ASCII time to time_t. The time can be in three formats
allowed for HTTP servers to send, as per RFC2068 -- RFC1123-date,
RFC850-date or asctime-date.
strptime() is used to recognize various dates, which makes it a
little bit slacker than the RFC1123/RFC850/asctime (e.g. it always
allows shortened dates and months, one-digit days, etc.). It also
allows more than one space anywhere where the specs require one SP.
The routine should probably be even slacker (RFC2068 recommends
this), but I do not have the time to write one.
Returns the computed time_t representation, or -1 if all the
schemes fail. */
time_t
http_atotm(char *s)
{
struct tm t;
t.tm_isdst = -1;
/* NOTE: We don't use `%n' for white space, as OSF's strptime uses
it to eat all white space up to (and including) a newline, and
the function fails (!) if there is no newline.
Let's hope all strptime-s use ` ' to skipp *all* whitespace
instead of just one (it works that way on all the systems I've
tested it on). */
/* Let's try RFC1123 date. */
if (strptime(s, "%a, %d %b %Y %T", &t))
return mktime_from_utc(&t);
/* RFC850 date. */
if (strptime(s, "%a, %d-%b-%y %T", &t))
return mktime_from_utc(&t);
/* asctime date. */
if (strptime(s, "%a %b %d %T %Y", &t))
return mktime_from_utc(&t);
/* Failure. */
return -1;
}